import os

import requests
from bs4 import BeautifulSoup
import pandas as pd

# 请求网页
url = 'https://nerc-ebd.ccnu.edu.cn/sysgk/szdw.htm'
response = requests.get(url)
response.encoding = 'utf-8'  # 确保正确的字符编码

# 解析网页
soup = BeautifulSoup(response.text, 'html.parser')

# 找到包含教师信息的表格
table = soup.find('table')

# 提取所有行
rows = table.find_all('tr')

# 解析每一行，提取姓名和职称
teachers = []
for row in rows[1:]:  # 跳过标题行
    cols = row.find_all('td')
    if len(cols) > 2:  # 确保列数足够
        name = cols[1].text.strip()
        title = cols[2].text.strip()
        # 过滤掉列标题
        if name != "姓名" and title != "职称":
            teachers.append([name, title])

# 创建DataFrame
df = pd.DataFrame(teachers, columns=['Name', 'Professional Titles'])

# 保存到CSV文件
output_dir = 'result_CNKI_ZL'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
df.to_csv('result_CNKI/teachers.csv', index=False, encoding='utf-8-sig')
